library(tidyverse)
library(tidync)
library(tidyterra)
library(tidylog)
library(sp)
library(raster)
library(devtools)
library(RCurl)
library(sdmTMB)
library(terra)
library(ncdf4)
library(chron)
library(viridis)
library(readxl)
library(ggsidekick); theme_set(theme_sleek()) # devtools::install_github("seananderson/ggsidekick") # not on CRAN
# Source code for map plots
devtools::source_url("https://raw.githubusercontent.com/maxlindmark/pred-prey-overlap/main/R/functions/map-plot.R")
# Set path
home <- here::here()Make the prediction grid
Intro
Make an evenly spaced UTM prediction grid with all spatially varying covariates for the diet and the biomass data
Read data and depth-raster
# Read data
d <- readr::read_csv("https://raw.githubusercontent.com/maxlindmark/spatial-metabolic-index/main/data/clean/catch_clean.csv") |>
rename(X = x, Y = y) Make the grid with depth
First make a grid for the biomass data, then subset that based on the extend of the stomach data
x <- d$X
y <- d$Y
z <- chull(x, y)
coords <- cbind(x[z], y[z])
coords <- rbind(coords, coords[1, ])
plot(coords[, 1] ~ coords[, 2]) # plot datasp_poly <- sp::SpatialPolygons(
list(sp::Polygons(list(sp::Polygon(coords)), ID = 1))
)
sp_poly_df <- sp::SpatialPolygonsDataFrame(sp_poly,
data = data.frame(ID = 1)
)
cell_width <- 3
pred_grid <- expand.grid(
X = seq(min(d$X), max(d$X), cell_width),
Y = seq(min(d$Y), max(d$Y), cell_width),
year = c(1993:2023)
)
ggplot(pred_grid |> filter(year == 2019), aes(X, Y)) +
geom_point(size = 0.1) +
theme_void() +
coord_sf()sp::coordinates(pred_grid) <- c("X", "Y")
inside <- !is.na(sp::over(pred_grid, as(sp_poly_df, "SpatialPolygons")))
pred_grid <- pred_grid[inside, ]
pred_grid <- as.data.frame(pred_grid)
ggplot(data = filter(pred_grid, year == 1999), aes(X*1000, Y*1000)) +
geom_point(size = 0.001, alpha = 0.5) +
NULLplot_map +
geom_point(data = filter(pred_grid, year == 1999), aes(X*1000, Y*1000), size = 0.001, alpha = 0.5) +
NULLWarning: Removed 2229 rows containing missing values or values outside the scale range
(`geom_point()`).
# Add lat and lon
# Need to go from UTM to lat long for this one...
# https://stackoverflow.com/questions/30018098/how-to-convert-utm-coordinates-to-lat-and-long-in-r
xy <- as.matrix(pred_grid |> dplyr::select(X, Y) |> mutate(X = X*1000, Y = Y*1000))
v <- vect(xy, crs="+proj=utm +zone=33 +datum=WGS84 +units=m")
y <- project(v, "+proj=longlat +datum=WGS84")
lonlat <- geom(y)[, c("x", "y")]
pred_grid$lon <- lonlat[, 1]
pred_grid$lat <- lonlat[, 2]
ggplot(filter(pred_grid, year == 1999), aes(lon, lat)) + geom_point()# Add depth now to remove islands and remaining land
# https://gis.stackexchange.com/questions/411261/read-multiple-layers-raster-from-ncdf-file-using-terra-package
# https://emodnet.ec.europa.eu/geoviewer/
dep_raster <- terra::rast(paste0(home, "/data/covariates/depth/Mean depth natural colour (with land).nc"))
class(dep_raster)[1] "SpatRaster"
attr(,"package")
[1] "terra"
crs(dep_raster, proj = TRUE)[1] "+proj=longlat +datum=WGS84 +no_defs"
plot(dep_raster)pred_grid$depth <- terra::extract(dep_raster, pred_grid |> dplyr::select(lon, lat))$elevation
ggplot(pred_grid, aes(lon, lat, color = depth*-1)) +
geom_point()pred_grid$depth <- pred_grid$depth*-1
pred_grid <- pred_grid |> drop_na(depth)
pred_grid |>
filter(year == 1999) |>
drop_na(depth) |>
#mutate(water = ifelse(depth < 0.00000001, "N", "Y")) |>
ggplot(aes(X*1000, Y*1000, fill = depth)) +
geom_raster() +
NULLplot_map +
geom_point(data = pred_grid, aes(X*1000, Y*1000), size = 0.001) +
geom_sf()Warning: Removed 34875 rows containing missing values or values outside the scale range
(`geom_point()`).
plot_map +
geom_raster(data = filter(pred_grid, year == 1999), aes(X*1000, Y*1000, fill = depth), size = 0.001) +
geom_sf()Warning in geom_raster(data = filter(pred_grid, year == 1999), aes(X * 1000, :
Ignoring unknown parameters: `size`
Warning: Removed 1134 rows containing missing values or values outside the scale range
(`geom_raster()`).
Add month_year variable for matching with raster layers
dat <- replicate_df(pred_grid, "quarter", c(1, 4)) |>
mutate(month = ifelse(quarter == 1, 2, 11), # most common months
month_year = paste(month, year, sep = "_"))Add ICES areas
# https://stackoverflow.com/questions/34272309/extract-shapefile-value-to-point-with-r
# https://gis.ices.dk/sf/
shape <- shapefile(paste0(home, "/data/shapefiles/ICES-StatRec-mapto-ICES-Areas/StatRec_map_Areas_Full_20170124.shp"))
head(shape) OBJECTID ID ICESNAME SOUTH WEST NORTH EAST AREA_KM2 stat_x stat_y Area_27
1 1 47 47A0 59.0 -44 59.5 -43 3178 -43.5 59.25 14.b.2
2 2 48 48A0 59.5 -44 60.0 -43 3132 -43.5 59.75 14.b.2
3 3 49 49A0 60.0 -44 60.5 -43 3085 -43.5 60.25 14.b.2
4 4 50 50A0 60.5 -44 61.0 -43 3038 -43.5 60.75 14.b.2
5 5 51 51A0 61.0 -44 61.5 -43 2991 -43.5 61.25 14.b.2
6 6 52 52A0 61.5 -44 62.0 -43 2944 -43.5 61.75 14.b.2
Perc MaxPer RNDMaxPer AreasList Shape_Leng Shape_Area
1 100.00000000 100.0000000 100 14.b.2 3 0.5
2 84.12674145 84.1267414 84 14.b.2 3 0.5
3 24.99803694 24.9980369 25 14.b.2 3 0.5
4 11.97744244 11.9774424 12 14.b.2 3 0.5
5 3.89717625 3.8971762 4 14.b.2 3 0.5
6 0.28578428 0.2857843 0 14.b.2 3 0.5
pts <- SpatialPoints(cbind(dat$lon, dat$lat),
proj4string = CRS(proj4string(shape)))
dat$subdiv <- over(pts, shape)$Area_27
# Rename subdivisions to the more common names and do some more filtering (by sub div and area)
sort(unique(dat$subdiv))[1] "3.b.23" "3.d.24" "3.d.25" "3.d.26" "3.d.27" "3.d.28.1" "3.d.28.2"
dat <- dat |>
mutate(sub_div = factor(subdiv),
sub_div = fct_recode(subdiv,
"24" = "3.d.24",
"25" = "3.d.25",
"26" = "3.d.26",
"27" = "3.d.27",
"28" = "3.d.28.1",
"28" = "3.d.28.2",
"29" = "3.d.29"),
sub_div = as.character(sub_div)) |>
filter(sub_div %in% c("24", "25", "26", "27", "28", 2)) |>
filter(lat > 54 & lat < 59 & lon < 22)Warning: There was 1 warning in `.fun()`.
ℹ In argument: `sub_div = fct_recode(...)`.
Caused by warning:
! Unknown levels in `f`: 3.d.29
# Add ICES rectangles
dat$ices_rect <- mapplots::ices.rect2(lon = dat$lon, lat = dat$lat)
dat <- dat |> dplyr::select(-subdiv)Add Saduria
saduria <- terra::rast(paste0(home, "/data/covariates/saduria/FWBiomassm_raster_19812019presHighweightcor_no0_newZi.tif"))
WGS84 <- "+proj=longlat +datum=WGS84"
saduria_latlon <- terra::project(saduria, WGS84)
density_saduria <- terra::extract(saduria_latlon, dat |> dplyr::select(lon, lat))
dat$saduria <- density_saduria$FWBiomassm_raster_19812019presHighweightcor_no0_newZiAdd pelagic
# Read data on rectangle level
spr <- read_xlsx(paste0(home, "/data/covariates/pelagic/N and B per Rect. 1991-2023.xlsx"),
sheet = 4) |>
rename(ices_rect = RECT,
year = ANNUS) |>
mutate_at(vars(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`), ~replace_na(., 0)) |> # I need to replace NA with 0, else I can't sum! According to Olavi who sent the data, NA means 0 and nothing else. Rectangle*year combinations that do not have information about biomass are simply not included in this data
mutate(ices_rect = as.factor(ices_rect),
Species = "Sprat",
biomass_spr = `1`+`2`+`3`+`4`+`5`+`6`+`7`+`8`,
id_r = paste(ices_rect, year, sep = "_")) |>
filter(year >= 1993) |>
dplyr::select(year, ices_rect, biomass_spr) |>
summarise(biomass_spr = mean(biomass_spr), .by = c(ices_rect, year))
her <- read_xlsx(paste0(home, "/data/covariates/pelagic/N and B per Rect. 1991-2023.xlsx"),
sheet = 3) |>
rename(ices_rect = RECT,
year = ANNUS) |>
mutate_at(vars(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`), ~replace_na(., 0)) |> # I need to replace NA with 0, else I can't sum! According to Olavi who sent the data, NA means 0 and nothing else. Rectangle*year combinations that do not have information about biomass are simply not included in this data
mutate(ices_rect = as.factor(ices_rect),
Species = "Sprat",
biomass_her = `1`+`2`+`3`+`4`+`5`+`6`+`7`+`8`,
id_r = paste(ices_rect, year, sep = "_")) |>
filter(year >= 1993) |>
dplyr::select(year, ices_rect, biomass_her) |>
summarise(biomass_her = mean(biomass_her), .by = c(ices_rect, year))
pelagics <- left_join(spr, her, by = c("year", "ices_rect"))
dat <- dat |> tidylog::left_join(pelagics, by = c("year", "ices_rect"))
# If there are no data in a specific rectangle, use the sub-division mean. If no values in the sub div, use annual mean
dat <- dat |>
mutate(biomass_spr_sd_mean = mean(biomass_spr, na.rm = TRUE),
biomass_her_sd_mean = mean(biomass_her, na.rm = TRUE),
.by = c(sub_div, year)) |>
mutate(biomass_spr = ifelse(is.na(biomass_spr), biomass_spr_sd_mean, biomass_spr),
biomass_her = ifelse(is.na(biomass_her), biomass_her_sd_mean, biomass_her)) |>
mutate(biomass_spr_yr_mean = mean(biomass_spr, na.rm = TRUE),
biomass_her_yr_mean = mean(biomass_her, na.rm = TRUE),
.by = c(year)) |>
mutate(biomass_spr = ifelse(is.na(biomass_spr), biomass_spr_yr_mean, biomass_spr),
biomass_her = ifelse(is.na(biomass_her), biomass_her_yr_mean, biomass_her))Add environmental covariates
Oxygen
covPath <- paste0(home, "/data/covariates")
# Source:
# https://data.marine.copernicus.eu/product/BALTICSEA_ANALYSISFORECAST_BGC_003_007/download?dataset=cmems_mod_bal_bgc_anfc_P1M-m_202311
# https://data.marine.copernicus.eu/product/BALTICSEA_MULTIYEAR_BGC_003_012/download?dataset=cmems_mod_bal_bgc_my_P1M-m_202303
# Print details
print(nc_open(paste(covPath, "oxygen", "cmems_mod_bal_bgc_my_P1M-m_1718018848983.nc", sep = "/")))File /Users/maxlindmark/Dropbox/Max work/R/pred-prey-overlap/data/covariates/oxygen/cmems_mod_bal_bgc_my_P1M-m_1718018848983.nc (NC_FORMAT_NETCDF4):
1 variables (excluding dimension variables):
float o2b[longitude,latitude,time] (Contiguous storage)
units: mmol m-3
_FillValue: NaN
standard_name: mole_concentration_of_dissolved_molecular_oxygen_in_sea_water
long_name: Dissolved Oxygen Concentration
3 dimensions:
latitude Size:404
standard_name: latitude
long_name: Latitude
units: degrees_north
unit_long: Degrees North
axis: Y
valid_min: 53.0082969665527
valid_max: 65.8909912109375
longitude Size:505
standard_name: longitude
long_name: Longitude
units: degrees_east
unit_long: Degrees East
axis: X
time Size:348
standard_name: time
long_name: Time
units: seconds since 1970-01-01 00:00:00
calendar: gregorian
axis: T
12 global attributes:
Conventions: CF-1.11
title: CMEMS ERGOM monthly integrated model fields
institution: Baltic MFC, PU Danish Meteorological Institute
source: CMEMS BAL MFC NEMO model output converted to NetCDF
history: Fri Dec 16 10:51:30 2022: cdo -z zip_1 --sortname copy /net/isilon/ifs/arch/home/iri/BALMFC/Nemo4.0/BALMFCMYP2022_univariateSST//BAL-ERGOM_BGC-DailyMeans-19930101.nc tmp_19930101.nc
contact: servicedesk.cmems@mercator-ocean.eu
references: https://marine.copernicus.eu/
comment: Data on cropped native product grid. Horizontal velocities destagged
subset:source: ARCO data downloaded from the Marine Data Store using the MyOcean Data Portal
subset:productId: BALTICSEA_MULTIYEAR_BGC_003_012
subset:datasetId: cmems_mod_bal_bgc_my_P1M-m_202303
subset:date: 2024-06-10T11:27:28.983Z
oxy_tibble <- tidync(paste(covPath, "oxygen",
"cmems_mod_bal_bgc_my_P1M-m_1718018848983.nc", sep = "/")) |>
hyper_tibble() |>
mutate(date = as_datetime(time, origin = '1970-01-01')) |>
mutate(month = month(date),
day = day(date),
year = year(date),
month_year = paste(month, year, sep = "_"),
oxy = o2b * 0.0223909)
# Now do recent data (forecast)
print(nc_open(paste(covPath, "oxygen", "cmems_mod_bal_bgc_anfc_P1M-m_1718022118172.nc", sep = "/")))File /Users/maxlindmark/Dropbox/Max work/R/pred-prey-overlap/data/covariates/oxygen/cmems_mod_bal_bgc_anfc_P1M-m_1718022118172.nc (NC_FORMAT_NETCDF4):
1 variables (excluding dimension variables):
float o2b[longitude,latitude,time] (Contiguous storage)
units: mmol m-3
_FillValue: NaN
standard_name: mole_concentration_of_dissolved_molecular_oxygen_in_sea_water
long_name: Dissolved Oxygen Concentration
3 dimensions:
latitude Size:323
standard_name: latitude
long_name: Latitude
units: degrees_north
unit_long: Degrees North
axis: Y
valid_min: 53.0082969665527
valid_max: 65.8909912109375
longitude Size:456
standard_name: longitude
long_name: Longitude
units: degrees_east
unit_long: Degrees East
axis: X
time Size:30
standard_name: time
long_name: Time
units: seconds since 1970-01-01 00:00:00
calendar: gregorian
axis: T
11 global attributes:
Conventions: CF-1.11
title: CMEMS ERGOM monthly integrated model fields
institution: Baltic MFC, PU Swedish Meteorological and Hydrological Institute
source: CMEMS BAL MFC NEMO model output converted to NetCDF
contact: servicedesk.cmems@mercator-ocean.eu
references: https://marine.copernicus.eu/
comment: Data on cropped native product grid. Horizontal velocities destagged
subset:source: ARCO data downloaded from the Marine Data Store using the MyOcean Data Portal
subset:productId: BALTICSEA_ANALYSISFORECAST_BGC_003_007
subset:datasetId: cmems_mod_bal_bgc_anfc_P1M-m_202311
subset:date: 2024-06-10T12:21:58.172Z
oxy_tibble_new <- tidync(paste(covPath, "oxygen",
"cmems_mod_bal_bgc_anfc_P1M-m_1718022118172.nc", sep = "/")) |>
hyper_tibble() |>
mutate(date = as_datetime(time, origin = '1970-01-01')) |>
mutate(month = month(date),
day = day(date),
year = year(date),
month_year = paste(month, year, sep = "_"),
oxy = o2b * 0.0223909) |>
filter(year > 2021)
oxy_tibble <- bind_rows(oxy_tibble, oxy_tibble_new)
# Loop through all year combos, extract the temperatures at the data locations
oxy_list <- list()
for(i in unique(dat$month_year)) {
d_sub <- filter(dat, month_year == i)
oxy_tibble_sub <- filter(oxy_tibble, month_year == i)
# Convert to raster
ggplot(oxy_tibble_sub, aes(longitude, latitude)) +
geom_point(size = 0.1)
oxy_raster <- as_spatraster(oxy_tibble_sub, xycols = 2:3,
crs = "WGS84", digits = 3)
ggplot() +
geom_spatraster(data = oxy_raster$oxy, aes(fill = oxy)) +
scale_fill_viridis(option = "magma") +
ggtitle(i)
# Extract from raster
d_sub$oxy <- terra::extract(oxy_raster$oxy,
d_sub |> dplyr::select(lon, lat))$oxy
# Save
oxy_list[[i]] <- d_sub
}
d_oxy <- bind_rows(oxy_list)Temperature & Salinity
# Source
# https://data.marine.copernicus.eu/product/BALTICSEA_MULTIYEAR_PHY_003_011/description
# https://data.marine.copernicus.eu/product/BALTICSEA_ANALYSISFORECAST_PHY_003_006/description
# Print details
print(nc_open(paste(covPath, "temperature_salinity", "cmems_mod_bal_phy_my_P1M-m_1718087629163.nc", sep = "/")))File /Users/maxlindmark/Dropbox/Max work/R/pred-prey-overlap/data/covariates/temperature_salinity/cmems_mod_bal_phy_my_P1M-m_1718087629163.nc (NC_FORMAT_NETCDF4):
2 variables (excluding dimension variables):
float bottomT[longitude,latitude,time] (Contiguous storage)
units: degrees_C
_FillValue: NaN
standard_name: sea_water_potential_temperature_at_sea_floor
long_name: Sea floor potential temperature
float sob[longitude,latitude,time] (Contiguous storage)
units: 0.001
_FillValue: NaN
standard_name: sea_water_salinity_at_sea_floor
long_name: Sea water salinity at sea floor
3 dimensions:
latitude Size:323
standard_name: latitude
long_name: Latitude
units: degrees_north
unit_long: Degrees North
axis: Y
valid_min: 53.0082969665527
valid_max: 65.8909912109375
longitude Size:418
standard_name: longitude
long_name: Longitude
units: degrees_east
unit_long: Degrees East
axis: X
time Size:348
standard_name: time
long_name: Time
units: seconds since 1970-01-01 00:00:00
calendar: gregorian
axis: T
11 global attributes:
Conventions: CF-1.11
title: CMEMS NEMO monthly integrated model fields
institution: Baltic MFC, PU Danish Meteorological Institute
source: CMEMS BAL MFC NEMO model output converted to NetCDF
contact: servicedesk.cmems@mercator-ocean.eu
references: https://marine.copernicus.eu/
comment: Data on cropped native product grid. Horizontal velocities destagged
subset:source: ARCO data downloaded from the Marine Data Store using the MyOcean Data Portal
subset:productId: BALTICSEA_MULTIYEAR_PHY_003_011
subset:datasetId: cmems_mod_bal_phy_my_P1M-m_202303
subset:date: 2024-06-11T06:33:49.163Z
st_tibble <- tidync(paste(covPath, "temperature_salinity",
"cmems_mod_bal_phy_my_P1M-m_1718087629163.nc", sep = "/")) |>
hyper_tibble() |>
mutate(date = as_datetime(time, origin = '1970-01-01')) |>
mutate(month = month(date),
day = day(date),
year = year(date),
month_year = paste(month, year, sep = "_"))
# Now do recent data (forecast)
print(nc_open(paste(covPath, "temperature_salinity", "cmems_mod_bal_phy_anfc_P1M-m_1718087127439.nc", sep = "/")))File /Users/maxlindmark/Dropbox/Max work/R/pred-prey-overlap/data/covariates/temperature_salinity/cmems_mod_bal_phy_anfc_P1M-m_1718087127439.nc (NC_FORMAT_NETCDF4):
2 variables (excluding dimension variables):
float bottomT[longitude,latitude,time] (Contiguous storage)
units: degrees_C
_FillValue: NaN
standard_name: sea_water_potential_temperature_at_sea_floor
long_name: Sea water potential temperature at sea floor (given for depth comprise between 0 and 500m)
float sob[longitude,latitude,time] (Contiguous storage)
units: 0.001
_FillValue: NaN
standard_name: sea_water_salinity_at_sea_floor
long_name: Sea water salinity at sea floor
3 dimensions:
latitude Size:395
standard_name: latitude
long_name: Latitude
units: degrees_north
unit_long: Degrees North
axis: Y
valid_min: 53.0082969665527
valid_max: 65.8909912109375
longitude Size:426
standard_name: longitude
long_name: Longitude
units: degrees_east
unit_long: Degrees East
axis: X
time Size:30
standard_name: time
long_name: Time
units: seconds since 1970-01-01 00:00:00
calendar: gregorian
axis: T
11 global attributes:
Conventions: CF-1.11
title: CMEMS NEMO monthly integrated model fields
institution: Baltic MFC, PU Swedish Meteorological and Hydrological Institute
source: CMEMS BAL MFC NEMO model output converted to NetCDF
contact: servicedesk.cmems@mercator-ocean.eu
references: https://marine.copernicus.eu/
comment: Data on cropped native product grid. Horizontal velocities destagged
subset:source: ARCO data downloaded from the Marine Data Store using the MyOcean Data Portal
subset:productId: BALTICSEA_ANALYSISFORECAST_PHY_003_006
subset:datasetId: cmems_mod_bal_phy_anfc_P1M-m_202311
subset:date: 2024-06-11T06:25:27.439Z
st_tibble_new <- tidync(paste(covPath, "temperature_salinity",
"cmems_mod_bal_phy_anfc_P1M-m_1718087127439.nc", sep = "/")) |>
hyper_tibble() |>
mutate(date = as_datetime(time, origin = '1970-01-01')) |>
mutate(month = month(date),
day = day(date),
year = year(date),
month_year = paste(month, year, sep = "_")) |>
filter(year > 2021)
st_tibble <- bind_rows(st_tibble, st_tibble_new)
# Loop through all year combos, extract the temperatures at the data locations
st_list <- list()
for(i in unique(dat$month_year)) {
d_sub <- filter(dat, month_year == i)
st_tibble_sub <- filter(st_tibble, month_year == i)
# Convert to raster
ggplot(st_tibble_sub, aes(longitude, latitude)) +
geom_point(size = 0.1)
st_raster <- as_spatraster(st_tibble_sub, xycols = 3:4,
crs = "WGS84", digits = 3)
ggplot() +
geom_spatraster(data = st_raster$bottomT, aes(fill = bottomT)) +
scale_fill_viridis(option = "magma") +
ggtitle(i)
ggplot() +
geom_spatraster(data = st_raster$sob, aes(fill = sob)) +
scale_fill_viridis(option = "magma") +
ggtitle(i)
# Extract from raster
d_sub$temp <- terra::extract(st_raster$bottomT,
d_sub |> dplyr::select(lon, lat))$bottomT
d_sub$salinity <- terra::extract(st_raster$sob,
d_sub |> dplyr::select(lon, lat))$sob
# Save
st_list[[i]] <- d_sub
}
d_st <- bind_rows(st_list)env_dat <- d_st |>
left_join(d_oxy |> dplyr::select(-X, -Y, -depth, -quarter, -year, -month),
by = c("month_year", "lon", "lat")) |>
dplyr::select(month_year, lon, lat, temp, salinity, oxy)# Now join these data with the full_dat
dat_full <- left_join(dat, env_dat, by = c("month_year", "lon", "lat")) |>
drop_na(temp, salinity, oxy)Save
# Remove variables and save
pred_grid_93_08 <- dat_full |> filter(year <= 2008)
pred_grid_09_23 <- dat_full |> filter(year >= 2009)
write_csv(pred_grid_93_08, file = paste0(home, "/data/clean/pred_grid_(1_2).csv"))
write_csv(pred_grid_09_23, file = paste0(home, "/data/clean/pred_grid_(2_2).csv"))